
## Data descriptives for counties
rm(list=ls())
library(foreign)
library(xtable)
library(tidyverse)
library(reshape2)

a <- read.csv("counties_map.csv",stringsAsFactors = F)
nrow(a) # 298
a$fips_state <- gsub("(\\d+)(\\d{3})$","\\1",a$fips)
## regions:
northeast <- c(09,23,25,33,44,50,34,36,42)
south <- c(10,11,12,13,24,37,45,51,54,01,21,28,47,05,22,40,48)
west <- c(04,08,16,30,32,35,49,56,02,06,15,41,53)
midwest <- c(18,17,26,39,55,19,31,20,38,27,46,29)

demog <- read.csv("county_demog.csv") # from Social Explorer: http://www.socialexplorer.com/tables/C2000/R11348364
bea <- read.csv("bea_regions.csv",stringsAsFactors = F)

a <- merge(a,demog,by.x="fips",by.y="Geo_FIPS",all.x=T)

# finance data:
load("counties.Rdata")
findat <- data
findat <- subset(findat,YearData>=1950)

pop <- read.dta( file="county_population_census.dta")
pop <- subset(pop, year==2010)
pop <- dplyr::rename(pop, population_2010=population)  
demog <- merge(demog,pop[,c("fips","population_2010")],by.x="Geo_FIPS",by.y="fips")
      
findat <- merge(findat,pop[,c("fips","population_2010")],by="fips")

# bigcounties_list <- subset(findat,population_2010>150000 & YearData==2010)
bigcounties_list <- subset(demog,population_2010>150000 |
                                 Geo_FIPS == 15009) # adds back in Maui HI
bigcounties_list$fips <- bigcounties_list$Geo_FIPS
length(unique(bigcounties_list$fips)) # 414

## race:
# white alone: SE_T014_002
mean(a$SE_T014_002/a$SE_T014_001,na.rm=T) # 0.6528
sd(a$SE_T014_002/a$SE_T014_001,na.rm=T) # 0.1794

# black alone: SE_T014_003
mean(a$SE_T014_003/a$SE_T014_001,na.rm=T) # 0.1897
sd(a$SE_T014_003/a$SE_T014_001,na.rm=T) # 0.1839

## educ:
# college degree or more: SE_T043_005
mean(a$SE_T043_005/a$SE_T014_001,na.rm=T) # 0.1625
sd(a$SE_T043_005/a$SE_T014_001,na.rm=T) # 0.0717

## income
# median household income 1999 dollars: SE_T093_001
mean(a$SE_T093_001,na.rm=T) # 39561.03
sd(a$SE_T093_001,na.rm=T) # 10864.79

## house value
# median house value - owner occupied? SE_T163_001
mean(a$SE_T163_001,na.rm=T) # 120466.7
sd(a$SE_T163_001,na.rm=T) # 60143.86

# tab <- c(mean(a$SE_T014_001),sd(a$SE_T014_001),mean(a$Geo_REGION == 4),sd(a$Geo_REGION == 4),mean(a$Geo_REGION == 3),sd(a$Geo_REGION == 3),mean(a$Geo_REGION == 1),sd(a$Geo_REGION == 1),mean(a$SE_T014_002/a$SE_T014_001,na.rm=T),sd(a$SE_T014_002/a$SE_T014_001,na.rm=T),mean(a$SE_T014_003/a$SE_T014_001,na.rm=T),sd(a$SE_T014_003/a$SE_T014_001,na.rm=T),mean(a$SE_T043_005/a$SE_T014_001,na.rm=T),sd(a$SE_T043_005/a$SE_T014_001,na.rm=T),mean(a$SE_T093_001,na.rm=T),sd(a$SE_T093_001,na.rm=T),mean(a$SE_T163_001,na.rm=T),sd(a$SE_T163_001,na.rm=T),nrow(a))
tab <- c(as.character(c(round(mean(a$SE_T014_001),0),paste0("(",round(sd(a$SE_T014_001),0),")"))),
         c(round(100*mean(a$Geo_REGION == 4),0),paste0("(",round(100*sd(a$Geo_REGION == 4),0),")"),
           round(100*mean(a$Geo_REGION == 3),0),paste0("(",round(100*sd(a$Geo_REGION == 3),0),")"),
           round(100*mean(a$Geo_REGION == 1),0),paste0("(",round(100*sd(a$Geo_REGION == 1),0),")"),
           round(100*mean(a$SE_T014_002/a$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(a$SE_T014_002/a$SE_T014_001,na.rm=T),0),")"),
           round(100*mean(a$SE_T014_003/a$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(a$SE_T014_003/a$SE_T014_001,na.rm=T),0),")"),
           round(100*mean(a$SE_T043_005/a$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(a$SE_T043_005/a$SE_T014_001,na.rm=T),0),")")),
         c(round(mean(a$SE_T093_001,na.rm=T),0),paste0("(",round(sd(a$SE_T093_001,na.rm=T),0),")"),
           round(mean(a$SE_T163_001,na.rm=T),0),paste0("(",round(sd(a$SE_T163_001,na.rm=T),0),")")),
         nrow(a))



# xtable(data.frame(ourdata=tab,allUS=allUS,over25k=over25k),digits=2)

## calculating the US and over 100k myself:
length(unique(demog$Geo_FIPS)) # 3136
# population:
mean(demog$SE_T014_001) # 88608.42
sd(demog$SE_T014_001) # 289108.2

# regions:
# mean(demog$Geo_REGION==2) # midwest = 0.328
mean(demog$Geo_REGION==4) # west = 0.138
mean(demog$Geo_REGION==3) # south = 0.442
mean(demog$Geo_REGION==1) # northeast = 0.067

## race:
# white alone: SE_T014_002
mean(demog$SE_T014_002/demog$SE_T014_001,na.rm=T) # 0.848
sd(demog$SE_T014_002/demog$SE_T014_001,na.rm=T) # 0.212

# black alone: SE_T014_003
mean(demog$SE_T014_003/demog$SE_T014_001,na.rm=T) # 0.069
sd(demog$SE_T014_003/demog$SE_T014_001,na.rm=T) # 0.158
## educ:
# college degree or more: SE_T043_005
mean(demog$SE_T043_005/demog$SE_T014_001,na.rm=T) # 0.116
sd(demog$SE_T043_005/demog$SE_T014_001,na.rm=T) # 0.103

## income
# median household income 1999 dollars: SE_T093_001
mean(demog$SE_T093_001,na.rm=T) # 38519.86
sd(demog$SE_T093_001,na.rm=T) # 18804.84

## house value
# median house value - owner occupied? SE_T163_001
mean(demog$SE_T163_001,na.rm=T) # 95024.97
sd(demog$SE_T163_001,na.rm=T) # 92256.89

allUS <- c(as.character(c(round(mean(demog$SE_T014_001),0),paste0("(",round(sd(demog$SE_T014_001),0),")"))),
           c(round(100*mean(demog$Geo_REGION == 4),0),paste0("(",round(100*sd(demog$Geo_REGION == 4),0),")"),
             round(100*mean(demog$Geo_REGION == 3),0),paste0("(",round(100*sd(demog$Geo_REGION == 3),0),")"),
             round(100*mean(demog$Geo_REGION == 1),0),paste0("(",round(100*sd(demog$Geo_REGION == 1),0),")"),
             round(100*mean(demog$SE_T014_002/demog$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(demog$SE_T014_002/demog$SE_T014_001,na.rm=T),0),")"),
             round(100*mean(demog$SE_T014_003/demog$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(demog$SE_T014_003/demog$SE_T014_001,na.rm=T),0),")"),
             round(100*mean(demog$SE_T043_005/demog$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(demog$SE_T043_005/demog$SE_T014_001,na.rm=T),0),")")),
           c(round(mean(demog$SE_T093_001,na.rm=T),0),paste0("(",round(sd(demog$SE_T093_001,na.rm=T),0),")"),
             round(mean(demog$SE_T163_001,na.rm=T),0),paste0("(",round(sd(demog$SE_T163_001,na.rm=T),0),")")),
           length(unique(demog$Geo_FIPS)))

# allUS <- c(mean(demog$SE_T014_001),sd(demog$SE_T014_001),mean(demog$Geo_REGION==4),sd(demog$Geo_REGION==4),mean(demog$Geo_REGION==3),sd(demog$Geo_REGION==3),mean(demog$Geo_REGION==1),sd(demog$Geo_REGION==1),mean(demog$SE_T014_002/demog$SE_T014_001,na.rm=T),sd(demog$SE_T014_002/demog$SE_T014_001,na.rm=T),mean(demog$SE_T014_003/demog$SE_T014_001,na.rm=T),sd(demog$SE_T014_003/demog$SE_T014_001,na.rm=T), mean(demog$SE_T043_005/demog$SE_T014_001,na.rm=T),sd(demog$SE_T043_005/demog$SE_T014_001,na.rm=T),mean(demog$SE_T093_001,na.rm=T),sd(demog$SE_T093_001,na.rm=T),mean(demog$SE_T163_001,na.rm=T),sd(demog$SE_T163_001,na.rm=T),length(unique(demog$Geo_FIPS)))

big <- demog[demog$SE_T014_001>100000,]
bigger <- subset(demog,Geo_FIPS %in% bigcounties_list$fips & # population over 150k in 2010 based on census data
                       Geo_STATE != 6 & # exclude nonpartisan states:
                       Geo_STATE != 27 & 
                       Geo_STATE != 55 & 
                       Geo_STATE != 2 & 
                       Geo_STATE != 46 & 
                       Geo_STATE != 38 & 
                       Geo_STATE != 22) # (CA, MN, WI, AK, SD, ND, and LA)
bigger <- subset(bigger,Geo_FIPS != 12073 & Geo_FIPS != 12127) #  drop nonpartisan FL counties
bigger <- subset(bigger,Geo_FIPS != 53073 & # drop other nonpartisan ones: Whatcom County
                       Geo_FIPS != 41067   # Washington, OR
                 )
bigger <- subset(bigger,Geo_FIPS != 9001 & # remove nonfunctional CT counties
                       Geo_FIPS != 9003 &
                       Geo_FIPS != 9005 &
                       Geo_FIPS != 9007 &
                       Geo_FIPS != 9009 &
                       Geo_FIPS != 9011 &
                       Geo_FIPS != 9013 & 
                       Geo_FIPS != 44003 & # nonfunctional RI ones (Kent + Providence)
                       Geo_FIPS != 44007 & 
                       Geo_FIPS != 50007 # nonfunctional VT (Chittenden)
)
bigger <- subset(bigger,Geo_FIPS != 24510 & # remove consolodated counties: Baltimore
                       Geo_FIPS != 47037 & # Davidson
                       Geo_FIPS != 8031 & # Denver
                       Geo_FIPS != 12031 & # Duval
                       Geo_FIPS != 13215 & # Muscogee
                       Geo_FIPS != 13245 & # Richmond GA
                       Geo_FIPS != 15003 & # Honolulu
                       Geo_FIPS != 18097 & # Marion
                       Geo_FIPS != 42101 & # Philadelphia
                       Geo_FIPS != 13021 & # Bibb
                       Geo_FIPS != 29510 & # St Louis
                       Geo_FIPS != 51550 & # Virginia cities: Chesapeake
                       Geo_FIPS != 51700 & # Newport News
                       Geo_FIPS != 51710 & # Norfolk
                       Geo_FIPS != 51760 & # Richmond
                       Geo_FIPS != 51810  # Virginia Beach
) 
bigger <- subset(bigger,Geo_FIPS != 36005 & # remove non-independent NYC counties:
                       Geo_FIPS != 36047 & 
                       Geo_FIPS != 36061 & 
                       Geo_FIPS != 36081 & 
                       Geo_FIPS != 36085
)
length(unique(bigger$Geo_FIPS)) # 319 in target universe
# write.csv(bigger[-which(bigger$Geo_FIPS %in% a$fips),],"temp/missingcounties.csv",row.names = F)

over100k <- c(as.character(c(round(mean(big$SE_T014_001),0),paste0("(",round(sd(big$SE_T014_001),0),")"))),
              c(round(100*mean(big$Geo_REGION == 4),0),paste0("(",round(100*sd(big$Geo_REGION == 4),0),")"),
                round(100*mean(big$Geo_REGION == 3),0),paste0("(",round(100*sd(big$Geo_REGION == 3),0),")"),
                round(100*mean(big$Geo_REGION == 1),0),paste0("(",round(100*sd(big$Geo_REGION == 1),0),")"),
                round(100*mean(big$SE_T014_002/big$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(big$SE_T014_002/big$SE_T014_001,na.rm=T),0),")"),
                round(100*mean(big$SE_T014_003/big$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(big$SE_T014_003/big$SE_T014_001,na.rm=T),0),")"),
                round(100*mean(big$SE_T043_005/big$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(big$SE_T043_005/big$SE_T014_001,na.rm=T),0),")")),
              c(round(mean(big$SE_T093_001,na.rm=T),0),paste0("(",round(sd(big$SE_T093_001,na.rm=T),0),")"),
                round(mean(big$SE_T163_001,na.rm=T),0),paste0("(",round(sd(big$SE_T163_001,na.rm=T),0),")")),
              length(unique(big$Geo_FIPS)))

over150k <- c(as.character(c(round(mean(bigger$SE_T014_001),0),paste0("(",round(sd(bigger$SE_T014_001),0),")"))),
              c(round(100*mean(bigger$Geo_REGION == 4),0),paste0("(",round(100*sd(bigger$Geo_REGION == 4),0),")"),
                round(100*mean(bigger$Geo_REGION == 3),0),paste0("(",round(100*sd(bigger$Geo_REGION == 3),0),")"),
                round(100*mean(bigger$Geo_REGION == 1),0),paste0("(",round(100*sd(bigger$Geo_REGION == 1),0),")"),
                round(100*mean(bigger$SE_T014_002/bigger$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(bigger$SE_T014_002/bigger$SE_T014_001,na.rm=T),0),")"),
                round(100*mean(bigger$SE_T014_003/bigger$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(bigger$SE_T014_003/bigger$SE_T014_001,na.rm=T),0),")"),
                round(100*mean(bigger$SE_T043_005/bigger$SE_T014_001,na.rm=T),0),paste0("(",round(100*sd(bigger$SE_T043_005/bigger$SE_T014_001,na.rm=T),0),")")),
              c(round(mean(bigger$SE_T093_001,na.rm=T),0),paste0("(",round(sd(bigger$SE_T093_001,na.rm=T),0),")"),
                      round(mean(bigger$SE_T163_001,na.rm=T),0),paste0("(",round(sd(bigger$SE_T163_001,na.rm=T),0),")")),
              length(unique(bigger$Geo_FIPS)))


names <- c("Population","" ,"% West","" ,"% South","" ,"% Northeast","" ,"% White","" ,"% Black","" ,"% College degree or more","" , "Median household income","" , "Median home value","" , "Number of counties")

summary_table <- data.frame(rownames=names,
                  ourdata=tab,
                  over150k=over150k,
                  allUS=allUS
)
colnames(summary_table) <- c("","Final Sample",">150k population","All U.S. counties")
print(xtable(summary_table,
             caption = "County Summary Statistics",
             label="tab:descriptives"),
      file="Tables/descriptive_comparison.tex",
      include.rownames=F,include.colnames=T,floating = FALSE,
      # align=c("r","c","c","c"),
      # hline.after=NULL, add.to.row=list(pos=list(0,nrow(summary_table)),command=c('\\toprule\n','\\bottomrule\n'))
      booktabs=T)

## what percentage of total US is in our sample?
# total population in all counties:
sum(demog$SE_T014_001) # 285,230,516
sum(bigger$SE_T014_001) # 88,154,997
sum(bigger$SE_T014_001)/sum(demog$SE_T014_001) # 0.479
(pop.coverage <- sum(a$SE_T014_001,na.rm=T)/sum(demog$SE_T014_001)) # 0.46
(pop.coverage.150k <- sum(bigger$SE_T014_001,na.rm=T)/sum(demog$SE_T014_001)) # 0.48
sum(demog$SE_T014_001)/2
# writeLines(as.character(length(unique(a$fips))),"Tables/n_counties.tex")
writeLines(as.character(length(unique(a$fips))),"Tables/n_counties.tex")
writeLines(as.character(nrow(bigger)),"Tables/n_counties150k.tex")
writeLines(as.character(length(unique(a$fips_state))),"Tables/n_states.tex")
writeLines(as.character(floor(pop.coverage*100)),"Tables/population_coverage_floor.tex")
writeLines(as.character(round(pop.coverage.150k*100,1)),"Tables/population_coverage_150k.tex")
writeLines(as.character(length(unique(a$fips[a$yearmax>=2000]))),"Tables/n_counties_post2000.tex")
writeLines(as.character(length(unique(a$fips[a$yearmin<=1990]))),"Tables/n_counties_1990.tex")

head(a[order(a$SE_T014_001,decreasing=F),]) # example small counties: Hays County TX, Forsyth County, GA, Delaware County, OH
head(a[order(a$SE_T014_001,decreasing=T),]) # example large counties: LA County (no longer in there?), Cook County IL, Harris County TX, Maricopa County, AZ, Miami-Dade County

# checking significance of balance across sample/target population:
t.test(bigger$SE_T014_001,a$SE_T014_001) # p=0.2234
t.test(a$fips_state %in% west,bigger$Geo_STATE %in% west) # p=0.56
t.test(a$fips_state %in% south,bigger$Geo_STATE %in% south) # p=0.54
t.test(a$fips_state %in% northeast,bigger$Geo_STATE %in% northeast) # p=0.2951
t.test(a$SE_T014_002/a$SE_T014_001,bigger$SE_T014_002/bigger$SE_T014_001) # p=0.99 % white
t.test(a$SE_T014_003/a$SE_T014_001, bigger$SE_T014_003/bigger$SE_T014_001) # p=0.879 % black
t.test(a$SE_T043_005/a$SE_T014_001,bigger$SE_T043_005/bigger$SE_T014_001) # p=0.86 % college educated
t.test(a$SE_T093_001,bigger$SE_T093_001) # p=0.698 median income
t.test(a$SE_T163_001,bigger$SE_T163_001) # p=0.70 median home value
# against whole US:
t.test(a$SE_T014_001,demog$SE_T014_001) # p<0.01
t.test(a$fips_state %in% west,demog$Geo_STATE %in% west) # p=0.92
t.test(a$fips_state %in% south,demog$Geo_STATE %in% south) # p=0.20
t.test(a$fips_state %in% northeast,demog$Geo_STATE %in% northeast) # p<0.01
t.test(a$SE_T014_002/a$SE_T014_001,demog$SE_T014_002/demog$SE_T014_001) # p<0.01 % white
t.test(a$SE_T014_003/a$SE_T014_001, demog$SE_T014_003/demog$SE_T014_001) # p=0.002 % black
t.test(a$SE_T043_005/a$SE_T014_001,demog$SE_T043_005/demog$SE_T014_001) # p<0.01 % college educated
t.test(a$SE_T093_001,demog$SE_T093_001) # p<0.01 median income
t.test(a$SE_T163_001,demog$SE_T163_001) # p<0.01 median home value

### Descriptives for finances data:

ourdat <- subset(findat,fips %in% a$fips)
length(unique(ourdat$fips)) # 296
# maxyear <- ourdat %>%
#       group_by(fips) %>%
#       summarize(maxyear = max(YearData))


tabfin <- c(
      as.character(c(round(mean(ourdat$Total.Expenditure.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$Total.Expenditure.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$education.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$education.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$fire.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$fire.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$police.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$police.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$health.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$health.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$highways.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$highways.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$housing.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$housing.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$libraries.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$libraries.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$parks.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$parks.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$sanitation.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$sanitation.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$utilities.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$utilities.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$welfare.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$welfare.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$interest.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$interest.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$admin.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$admin.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      
      as.character(c(round(mean(ourdat$revenue.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$revenue.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$revenueown.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$revenueown.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$Total.Taxes.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$Total.Taxes.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$salestax.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$salestax.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$propertytax.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$propertytax.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$debt.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$debt.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(ourdat$igrev.PC[ourdat$YearData==2012],na.rm=T),0),paste0("(",round(sd(ourdat$igrev.PC[ourdat$YearData==2012],na.rm=T),0),")"))),
      
      length(unique(ourdat$fips[ourdat$YearData==2012])))


bigger <- subset(findat,fips %in% bigcounties_list$fips & # subset to counties over 150k in 2010
                       fips_state != 6 & 
                       fips_state != 27 & 
                       fips_state != 55 & 
                       fips_state != 2 & 
                       fips_state != 46 & 
                       fips_state != 38 & 
                       fips_state != 22) # exclude CA, MN, WI, AK, SD, ND, and LA,

bigger <- subset(bigger,fips != 12073 & fips != 12127) #  as well as drop nonpartisan FL ones
bigger <- subset(bigger,fips != 53073 & # drop other nonpartisan ones: Whatcom County
                       fips != 41067   # Washington, OR
)
bigger <- subset(bigger,fips != 9001 & # remove nonfunctional CT counties
                       fips != 9003 &
                       fips != 9005 &
                       fips != 9007 &
                       fips != 9009 &
                       fips != 9011 &
                       fips != 9013 & 
                       fips != 44003 & # nonfunctional RI ones (Kent + Providence)
                       fips != 44007 & 
                       fips != 50007 # nonfunctional VT (Chittenden)
)
bigger <- subset(bigger,fips != 24510 & # remove consolodated counties: Baltimore
                       fips != 47037 & # Davidson
                       fips != 8031 & # Denver
                       fips != 12031 & # Duval
                       fips != 13215 & # Muscogee
                       fips != 13245 & # Richmond GA
                       fips != 15003 & # Honolulu
                       fips != 18097 & # Marion
                       fips != 42101 & # Philadelphia
                       fips != 13021 & # Bibb
                       fips != 29510 & # St Louis
                       fips != 51550 & # Virginia cities: Chesapeake
                       fips != 51700 & # Newport News
                       fips != 51710 & # Norfolk
                       fips != 51760 & # Richmond
                       fips != 51810  # Virginia Beach
) 
bigger <- subset(bigger,fips != 36005 & # remove non-independent NYC counties:
                       fips != 36047 & 
                       fips != 36061 & 
                       fips != 36081 & 
                       fips != 36085
)
length(unique(bigger$fips)) # 318

over150kfin <- c(
      as.character(c(round(mean(bigger$Total.Expenditure.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$Total.Expenditure.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$education.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$education.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$fire.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$fire.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$police.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$police.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$health.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$health.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$highways.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$highways.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$housing.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$housing.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$libraries.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$libraries.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$parks.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$parks.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$sanitation.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$sanitation.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$utilities.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$utilities.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$welfare.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$welfare.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$interest.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$interest.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$admin.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$admin.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      
      as.character(c(round(mean(bigger$revenue.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$revenue.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$revenueown.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$revenueown.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$Total.Taxes.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$Total.Taxes.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$salestax.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$salestax.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$propertytax.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$propertytax.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$debt.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$debt.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(bigger$igrev.PC[bigger$YearData==2012],na.rm=T),0),paste0("(",round(sd(bigger$igrev.PC[bigger$YearData==2012],na.rm=T),0),")"))),
      
      length(unique(bigger$fips[bigger$YearData==2012])))

allUSfin <- c(
      as.character(c(round(mean(findat$Total.Expenditure.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$Total.Expenditure.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$education.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$education.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$fire.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$fire.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$police.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$police.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$health.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$health.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$highways.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$highways.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$housing.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$housing.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$libraries.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$libraries.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$parks.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$parks.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$sanitation.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$sanitation.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$utilities.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$utilities.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$welfare.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$welfare.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$interest.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$interest.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$admin.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$admin.PC[findat$YearData==2012],na.rm=T),0),")"))),
      
      as.character(c(round(mean(findat$revenue.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$revenue.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$revenueown.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$revenueown.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$Total.Taxes.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$Total.Taxes.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$salestax.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$salestax.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$propertytax.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$propertytax.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$debt.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$debt.PC[findat$YearData==2012],na.rm=T),0),")"))),
      as.character(c(round(mean(findat$igrev.PC[findat$YearData==2012],na.rm=T),0),paste0("(",round(sd(findat$igrev.PC[findat$YearData==2012],na.rm=T),0),")"))),
      
      length(unique(findat$fips[findat$YearData==2012]))
      )
      

names <- c("Total Expenditures","" ,"Education","","Fire","","Police","","Health","","Highways","","Housing","", "Libraries","","Parks","","Sanitation","","Utilities","","Welfare","","Interest","","Administration","",
           "Total Revenues","","Own Sources","","Total Taxes","","Sales Taxes","","Property Taxes","","Debt","","Intergovernmental","",
           "Number of counties")

finsummary_table <- data.frame(rownames=names,
                            ourdata=tabfin,
                            over150k=over150kfin,
                            allUS=allUSfin
)
colnames(finsummary_table) <- c("","Final Sample",">150k population","All U.S. counties")
print(xtable(finsummary_table,
             caption = "County Financial Summary Statistics, 2012",
             label="tab:fin_descriptives"),
      file="Tables/descriptives_fin_comparison.tex",
      include.rownames=F,include.colnames=T,floating = FALSE,
      # align=c("r","c","c","c"),
      # hline.after=NULL, add.to.row=list(pos=list(0,nrow(summary_table)),command=c('\\toprule\n','\\bottomrule\n'))
      booktabs=T)

# percentage terms:
tabfin_perc <- c(
      # as.character(c(round(mean(100*ourdat$Total.Expenditure.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$Total.Expenditure.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$education.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$education.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$fire.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$fire.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$police.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$police.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$health.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$health.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$highways.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$highways.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$housing.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$housing.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$libraries.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$libraries.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$parks.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$parks.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$sanitation.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$sanitation.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$utilities.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$utilities.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$welfare.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$welfare.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$interest.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$interest.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$admin.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$admin.perc,na.rm=T),2),")"))),
      # as.character(c(round(mean(100*ourdat$revenue.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$revenue.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$revenueown.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$revenueown.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$taxes.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$taxes.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$salestax.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$salestax.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$propertytax.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$propertytax.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$debt.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$debt.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*ourdat$igrev.perc,na.rm=T),2),paste0("(",round(sd(100*ourdat$igrev.perc,na.rm=T),2),")"))),
      length(unique(ourdat$fips)))

over150kfin_perc <- c(
      # as.character(c(round(mean(100*bigger$Total.Expenditure.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$Total.Expenditure.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$education.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$education.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$fire.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$fire.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$police.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$police.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$health.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$health.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$highways.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$highways.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$housing.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$housing.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$libraries.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$libraries.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$parks.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$parks.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$sanitation.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$sanitation.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$utilities.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$utilities.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$welfare.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$welfare.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$interest.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$interest.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$admin.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$admin.perc,na.rm=T),2),")"))),
      
      # as.character(c(round(mean(100*bigger$revenue.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$revenue.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$revenueown.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$revenueown.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$taxes.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$taxes.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$salestax.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$salestax.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$propertytax.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$propertytax.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$debt.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$debt.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*bigger$igrev.perc,na.rm=T),2),paste0("(",round(sd(100*bigger$igrev.perc,na.rm=T),2),")"))),
      
      length(unique(bigger$fips)))

allUSfin_perc <- c(
      # as.character(c(round(mean(100*findat$Total.Expenditure.perc,na.rm=T),2),paste0("(",round(sd(100*findat$Total.Expenditure.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$education.perc,na.rm=T),2),paste0("(",round(sd(100*findat$education.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$fire.perc,na.rm=T),2),paste0("(",round(sd(100*findat$fire.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$police.perc,na.rm=T),2),paste0("(",round(sd(100*findat$police.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$health.perc,na.rm=T),2),paste0("(",round(sd(100*findat$health.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$highways.perc,na.rm=T),2),paste0("(",round(sd(100*findat$highways.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$housing.perc,na.rm=T),2),paste0("(",round(sd(100*findat$housing.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$libraries.perc,na.rm=T),2),paste0("(",round(sd(100*findat$libraries.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$parks.perc,na.rm=T),2),paste0("(",round(sd(100*findat$parks.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$sanitation.perc,na.rm=T),2),paste0("(",round(sd(100*findat$sanitation.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$utilities.perc,na.rm=T),2),paste0("(",round(sd(100*findat$utilities.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$welfare.perc,na.rm=T),2),paste0("(",round(sd(100*findat$welfare.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$interest.perc,na.rm=T),2),paste0("(",round(sd(100*findat$interest.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$admin.perc,na.rm=T),2),paste0("(",round(sd(100*findat$admin.perc,na.rm=T),2),")"))),
      
      # as.character(c(round(mean(100*findat$revenue.perc,na.rm=T),2),paste0("(",round(sd(100*findat$revenue.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$revenueown.perc,na.rm=T),2),paste0("(",round(sd(100*findat$revenueown.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$taxes.perc,na.rm=T),2),paste0("(",round(sd(100*findat$taxes.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$salestax.perc,na.rm=T),2),paste0("(",round(sd(100*findat$salestax.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$propertytax.perc,na.rm=T),2),paste0("(",round(sd(100*findat$propertytax.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$debt.perc,na.rm=T),2),paste0("(",round(sd(100*findat$debt.perc,na.rm=T),2),")"))),
      as.character(c(round(mean(100*findat$igrev.perc,na.rm=T),2),paste0("(",round(sd(100*findat$igrev.perc,na.rm=T),2),")"))),
      
      length(unique(findat$fips)))

names_perc <- c("Education","","Fire","","Police","","Health","","Highways","","Housing","", "Libraries","","Parks","","Sanitation","","Utilities","","Welfare","","Interest","","Administration","",
           "Own Sources","","Total Taxes","","Sales Taxes","","Property Taxes","","Debt","","Intergovernmental","",
           "Number of counties")

finsummary_perc_table <- data.frame(rownames=names_perc,
                               ourdata=tabfin_perc,
                               over150k=over150kfin_perc,
                               allUS=allUSfin_perc
)
colnames(finsummary_perc_table) <- c("","Final Sample",">150k population","All U.S. counties")
# print(xtable(finsummary_perc_table,
#              caption = "County Financial Summary Statistics",
#              label="tab:fin_descriptives_perc"),
#       file="Tables/descriptives_fin_perc_comparison.tex",
#       include.rownames=F,include.colnames=T,floating = FALSE,
#       # align=c("r","c","c","c"),
#       # hline.after=NULL, add.to.row=list(pos=list(0,nrow(summary_table)),command=c('\\toprule\n','\\bottomrule\n'))
#       booktabs=T)

# findat2 <- subset(findat,YearData==2015)

# average total spending:
writeLines(prettyNum(round(mean(findat$Total.Expenditure.PC[findat$YearData==1990],na.rm=T),0),big.mark=","),"Tables/mean_expenditures_total_1990.tex")
writeLines(prettyNum(round(mean(findat$Total.Expenditure.PC[findat$YearData==2014],na.rm=T),0),big.mark=","),"Tables/mean_expenditures_total_2014.tex")
writeLines(prettyNum(round(mean(findat$revenue.PC[findat$YearData==1990],na.rm=T),0),big.mark=","),"Tables/mean_revenue_total_1990.tex")
writeLines(prettyNum(round(mean(findat$revenue.PC[findat$YearData==2014],na.rm=T),0),big.mark=","),"Tables/mean_revenue_total_2014.tex")
writeLines(prettyNum(round(mean(findat$debt.PC[findat$YearData==1990],na.rm=T),0),big.mark=","),"Tables/mean_debt_total_1990.tex")
writeLines(prettyNum(round(mean(findat$debt.PC[findat$YearData==2014],na.rm=T),0),big.mark=","),"Tables/mean_debt_total_2014.tex")

finsumm_expPC_yearly_bigger <- subset(bigger,YearData>=1990 & YearData<=2014) %>%
	group_by(YearData) %>%
	summarize(
                                     # total.exp = mean(Total.Expenditure.PC,na.rm=T),
                                     Education = mean(education.PC,na.rm=T),
                                     Fire = mean(fire.PC,na.rm=T),
                                     Police = mean(police.PC,na.rm=T),
                                     Health = mean(health.PC,na.rm=T),
                                     Highways = mean(highways.PC,na.rm=T),
                                     Housing = mean(housing.PC,na.rm=T),
                                     Libraries = mean(libraries.PC,na.rm=T),
                                     Parks = mean(parks.PC,na.rm=T),
                                     Sanitation = mean(sanitation.PC,na.rm=T),
                                     Utilities = mean(utilities.PC,na.rm=T),
                                     Welfare = mean(welfare.PC,na.rm=T),
                                     Interest = mean(interest.PC,na.rm=T),
                                     # nonhighwaytransit = mean(nonhighwaytransit.perc,na.rm=T),
                                     Administration = mean(admin.PC,na.rm=T)
)
finsumm_expPC_yearly_bigger <- melt(finsumm_expPC_yearly_bigger,id.vars = "YearData")
finsumm_expPC_yearly_bigger$YearData <- as.numeric(finsumm_expPC_yearly_bigger$YearData)

####
## Figure A1: Expenditures over time, individual categories (target universe)
####
pdf("Figures/figureA1_financialsummary_expPC_bigger.pdf",height=6,width=10)
ggplot(finsumm_expPC_yearly_bigger) + 
      geom_path(aes(x=YearData,group=variable,linetype=variable,y=value),size=1.75) + 
      scale_y_continuous("Per capita expenditures (2012 dollars)") + 
      scale_x_continuous("Year",breaks = seq(1970,2015,5),limits=c(1992,2017)) + 
      theme_bw() + 
      theme(legend.position='none') +
      geom_text(data=finsumm_expPC_yearly_bigger[which(finsumm_expPC_yearly_bigger$YearData==2014 & finsumm_expPC_yearly_bigger$variable!="Sanitation"),],aes(x=YearData,y=value,label = variable),hjust=0,size=5) + 
      geom_text(data=finsumm_expPC_yearly_bigger[which(finsumm_expPC_yearly_bigger$YearData==2012 & finsumm_expPC_yearly_bigger$variable=="Sanitation"),],aes(x=YearData,y=value,label = variable),hjust=0,vjust=0,size=5) + 
      # annotate("text", label = "Expenditure Area", x = 2015, y = 225, size = 6, colour = "black") + 
      theme(text=element_text(size=16))
dev.off()


finsumm_expPC_yearly_bigger <- subset(bigger,YearData>=1990 & YearData<=2014) %>% 
	group_by(YearData) %>%
	summarize(
		# total.exp = mean(Total.Expenditure.PC,na.rm=T),
		Corrections = mean(corrections.PC,na.rm=T),
		Education = mean(education.PC,na.rm=T),
		Fire = mean(fire.PC,na.rm=T),
		Police = mean(police.PC,na.rm=T),
		Health = mean(health.PC,na.rm=T),
		Hospitals = mean(hospitals.pc,na.rm=T),
		Highways = mean(highways.PC,na.rm=T),
		Housing = mean(housing.PC,na.rm=T),
		Interest = mean(interest.PC,na.rm=T),
		Libraries = mean(libraries.PC,na.rm=T),
		NaturalResources = mean(naturalresources.PC,na.rm=T),
		Parks = mean(parks.PC,na.rm=T),
		Parking = mean(parking.PC,na.rm=T),
		Sanitation = mean(sanitation.PC,na.rm=T),
		Utilities = mean(utilities.PC,na.rm=T),
		Welfare = mean(welfare.PC,na.rm=T),
		Interest = mean(interest.PC,na.rm=T),
		# nonhighwaytransit = mean(nonhighwaytransit.perc,na.rm=T),
		Administration = mean(admin.PC,na.rm=T),
		Misc = mean(nec.PC,na.rm=T),
		Allocational = Fire + Police + Corrections + Interest + Administration + Parks + NaturalResources + Libraries + Misc,
		Developmental = Highways + Education + Sanitation + Utilities,
		'Redistribution (Health,\nHospitals, Housing, & Welfare)' = Health + Hospitals + Welfare + Housing,
		'Parks and Natural Resources' = Parks + NaturalResources,
		'Education and Libraries' = Education + Libraries,
		'Sanitation and Utilities' = Sanitation + Utilities,
		'Admin. and Misc.' = Administration + Misc,
		'Police, Fire,\nand Corrections' = Police + Fire + Corrections,
		Roads = Highways + Parking
	)
finsumm_expPC_yearly_bigger <- melt(finsumm_expPC_yearly_bigger,id.vars = "YearData")
finsumm_expPC_yearly_bigger$YearData <- as.numeric(finsumm_expPC_yearly_bigger$YearData)

####
## Figure 3: Expenditures per capita over time, individual categories (target universe)
####
pdf("Figures/figure3_financialsummary_expPC_grouped.pdf",height=6,width=10)
ggplot(subset(finsumm_expPC_yearly_bigger,variable %in% c(
      'Parks and Natural Resources',
      "Roads",
      'Education and Libraries',
      'Sanitation and Utilities',
      'Redistribution (Health,\nHospitals, Housing, & Welfare)',
      'Admin. and Misc.',
      'Police, Fire,\nand Corrections',
      "Interest"
))) + 
      geom_path(aes(x=YearData,group=variable,linetype=variable,y=value),size=1.75) + 
      scale_y_continuous("Per capita expenditures (2012 dollars)") + 
      scale_x_continuous("Year",breaks = seq(1970,2015,5),limits=c(1992,2022)) + 
      theme_bw() + 
      theme(legend.position='none') +
      geom_text(data=subset(finsumm_expPC_yearly_bigger,YearData==2014 & variable %in% c(
            'Parks and Natural Resources',
            # "Roads",
            'Education and Libraries',
            'Sanitation and Utilities',
            'Redistribution (Health,\nHospitals, Housing, & Welfare)',
            'Admin. and Misc.',
            'Police, Fire,\nand Corrections',
            "Interest"
      )),aes(x=YearData,y=value,label = variable,lineheight=0.75),hjust=0,size=5) + 
      geom_text(data=subset(finsumm_expPC_yearly_bigger,YearData==2014 & variable %in% c(
            "Roads"
      )),aes(x=YearData,y=value,label = variable,lineheight=0.75),hjust=0,vjust=-0.5,size=5) + 
      theme(text=element_text(size=16))
dev.off()


finsumm_revPC_yearly_bigger <- subset(bigger,YearData>=1990 & YearData<=2014) %>%
	group_by(YearData) %>%
	summarize(
		# 'Total Exp' = mean(Total.Expenditure.PC,na.rm=T),
		'Total Rev.' = mean(revenue.PC,na.rm=T),
		# 'Own-Source Rev.' = mean(revenueown.PC,na.rm=T),
		'Total Taxes' = mean(Total.Taxes.PC,na.rm=T),
		'Property Tax' = mean(propertytax.PC,na.rm=T),
		'Sales Tax' = mean(salestax.PC,na.rm=T),
		# igrev.state = mean(igrev.state.PC,na.rm=T),
		Intergovernmental = mean(igrev.PC,na.rm=T),
		# FTE = mean(FTE.PC,na.rm=T),
		# Employees = mean(employees.PC,na.rm=T),
		Debt = mean(debt.PC,na.rm=T),
		# gendebt = mean(generaldebt.PC,na.rm=T),
		Charges = mean(charges.PC,na.rm=T),
		Utilities = mean(utilitiesrev.PC,na.rm=T)
	)
finsumm_revPC_yearly_bigger <- melt(finsumm_revPC_yearly_bigger,id.vars = "YearData")
finsumm_revPC_yearly_bigger$YearData <- as.numeric(finsumm_revPC_yearly_bigger$YearData)

####
## Figure 4: Revenue per capita over time, individual categories (target universe)
####
pdf("Figures/figure4_financialsummary_revPC_bigger.pdf",height=6,width=10)
ggplot(finsumm_revPC_yearly_bigger) + 
      geom_path(aes(x=YearData,group=variable,linetype=variable,y=value),lwd=2) + 
      scale_y_continuous("Per capita revenue (2012 dollars)") + 
      scale_x_continuous("Year",breaks = seq(1990,2015,5),limits=c(1992,2018)) + 
      theme_bw() + 
      theme(legend.position='none') +
      # geom_text(data=finsumm_revPC_yearly_bigger[which(finsumm_revPC_yearly_bigger$YearData==2014),],
      #           aes(x=YearData,y=value,label = variable),hjust=0,size=5) + 
            geom_text(data=finsumm_revPC_yearly_bigger[which(finsumm_revPC_yearly_bigger$YearData==2014
                                                             & finsumm_revPC_yearly_bigger$variable != "Intergovernmental"
                                                             ),],
                aes(x=YearData,y=value,label = variable),hjust=0,size=5) +
      geom_text(data=finsumm_revPC_yearly_bigger[which(finsumm_revPC_yearly_bigger$YearData==2014 & finsumm_revPC_yearly_bigger$variable %in% c("Intergovernmental")),],aes(x=YearData,y=value,label = variable),hjust=0,nudge_y = 20,size=5) +
      # annotate("text", label = "Expenditure Area", x = 2015, y = 0.25, size = 6, colour = "black") + 
      theme(text=element_text(size=16))
dev.off()


writeLines(text=as.character(floor(100*(mean(bigger$education.PC[bigger$YearData==2012],na.rm=T)-mean(bigger$education.PC[bigger$YearData==1990],na.rm=T))/mean(bigger$education.PC[bigger$YearData==1990],na.rm=T))),con="Tables/educ_increase_perc_1990-2012.tex",sep = "")
writeLines(text=as.character(floor(100*(mean(bigger$health.PC[bigger$YearData==2012],na.rm=T)-mean(bigger$health.PC[bigger$YearData==1990],na.rm=T))/mean(bigger$health.PC[bigger$YearData==1990],na.rm=T))),con="Tables/health_increase_perc_1990-2012.tex",sep = "")
# grouped:
writeLines(text=as.character(floor(100*(mean(
      (bigger$health.PC[bigger$YearData==2012] + bigger$hospitals.pc[bigger$YearData==2012] + bigger$welfare.PC[bigger$YearData==2012] + bigger$housing.PC[bigger$YearData==2012]) # Health + Hospitals + Welfare + Housing
      ,na.rm=T)-mean(
            (bigger$health.PC[bigger$YearData==1990] + bigger$hospitals.pc[bigger$YearData==1990] + bigger$welfare.PC[bigger$YearData==1990] + bigger$housing.PC[bigger$YearData==1990])
            ,na.rm=T))/mean((bigger$health.PC[bigger$YearData==1990] + bigger$hospitals.pc[bigger$YearData==1990] + bigger$welfare.PC[bigger$YearData==1990] + bigger$housing.PC[bigger$YearData==1990]),na.rm=T))),con="Tables/redist_increase_perc_1990-2012.tex",sep = "")
writeLines(text=as.character(floor(100*(mean(
      (bigger$education.PC[bigger$YearData==2012] + bigger$libraries.PC[bigger$YearData==2012])
      ,na.rm=T)-mean(
            (bigger$education.PC[bigger$YearData==1990] + bigger$libraries.PC[bigger$YearData==1990])
             ,na.rm=T))/mean((bigger$education.PC[bigger$YearData==1990] + bigger$libraries.PC[bigger$YearData==1990]),na.rm=T))),con="Tables/educlib_increase_perc_1990-2012.tex",sep = "")
writeLines(text=as.character(floor(100*(mean(
      (bigger$police.PC[bigger$YearData==2012] + bigger$fire.PC[bigger$YearData==2012] + bigger$corrections.PC[bigger$YearData==2012])
      ,na.rm=T)-mean(
            (bigger$police.PC[bigger$YearData==1990] + bigger$fire.PC[bigger$YearData==1990] + bigger$corrections.PC[bigger$YearData==1990])
            ,na.rm=T))/mean((bigger$police.PC[bigger$YearData==1990] + bigger$fire.PC[bigger$YearData==1990] + bigger$corrections.PC[bigger$YearData==1990]),na.rm=T))),con="Tables/pubsafety_increase_perc_1990-2012.tex",sep = "")

writeLines(as.character(round(100*mean(findat$debt.perc[findat$YearData==1995],na.rm=T),0)),"Tables/mean_debt_perc_1995.tex")
writeLines(as.character(round(100*mean(findat$debt.perc[findat$YearData==2002],na.rm=T),0)),"Tables/mean_debt_perc_2002.tex")
# writeLines(as.character(round(100*mean(findat$debt.perc[findat$YearData==2007],na.rm=T),0)),"Tables/mean_debt_perc_2007.tex")
writeLines(as.character(round(100*mean(findat$debt.perc[findat$YearData==2014],na.rm=T),0)),"Tables/mean_debt_perc_2014.tex")
writeLines(as.character(round(100*mean(findat$debt.perc[findat$YearData==2008],na.rm=T),0)),"Tables/mean_debt_perc_2008.tex")



writeLines(as.character(prettyNum(round(mean(bigger$debt.PC[bigger$YearData==1990],na.rm=T),0),big.mark=",")),"Tables/biggermean_debt_PC_1990.tex")
writeLines(as.character(prettyNum(round(mean(bigger$debt.PC[bigger$YearData==1995],na.rm=T),0),big.mark=",")),"Tables/biggermean_debt_PC_1995.tex")
writeLines(as.character(prettyNum(round(mean(bigger$debt.PC[bigger$YearData==2002],na.rm=T),0),big.mark=",")),"Tables/biggermean_debt_PC_2002.tex")
# writeLines(as.character(prettyNum(round(mean(bigger$debt.PC[bigger$YearData==2007],na.rm=T),0),big.mark=",")),"Tables/biggermean_debt_PC_2007.tex")
writeLines(as.character(prettyNum(round(mean(bigger$debt.PC[bigger$YearData==2014],na.rm=T),0),big.mark=",")),"Tables/biggermean_debt_PC_2014.tex")
writeLines(as.character(prettyNum(round(mean(bigger$debt.PC[bigger$YearData==2009],na.rm=T),0),big.mark=",")),"Tables/biggermean_debt_PC_2009.tex")
writeLines(as.character(prettyNum(round(mean(bigger$revenue.PC[bigger$YearData==2009],na.rm=T),0),big.mark=",")),"Tables/biggermean_rev_PC_2009.tex")






## Over-time partisanship of county councils:
library(ggrepel)
source("geo_functions.R")

# a <- read.csv("councilcomp_cw_based_on_newspapers.csv",stringsAsFactors = F)

northeast <- c(09,23,25,33,44,50,34,36,42)
south <- c(10,11,12,13,24,37,45,51,54,01,21,28,47,05,22,40,48)
west <- c(04,08,16,30,32,35,49,56,02,06,15,41,53)
midwest <- c(18,17,26,39,55,19,31,20,38,27,46,29)

b <- read.csv("councilcomp_export.csv",stringsAsFactors = F)
## drop states with all nonpartisan elections
b <- subset(b, state!="California")
b <- subset(b, state!="CA")
b <- subset(b, state!="Minnesota" )
b <- subset(b, state!="MN" )
b <- subset(b, state!="Wisconsin" )
b <- subset(b, state!="WI" )
b <- subset(b, state!="Alaksa" )
b <- subset(b, state!="AK" )
b <- subset(b, state!="South Dakota" )
b <- subset(b, state!="SD" )
b <- subset(b, state!="North Dakota" )
b <- subset(b, state!="ND" )
b <- subset(b, state!="Louisiana" )
b <- subset(b, state!="LA" )
b <- subset(b, fips!="12073" )
b <- subset(b, fips!="12127" )
length(unique(b$fips)) # 496
length(unique(b$fips[b$fips %in% bigger$Geo_FIPS])) # 297

b$bea_region <- NULL
b$bea_region <- ifelse(b$fips_state %in% northeast,"Northeast",
                       ifelse(b$fips_state %in% west,"West",
                              ifelse(b$fips_state %in% south,"South",
                                     ifelse(b$fips_state %in% midwest,"Midwest",NA))))

length(unique(b$fips)) # 496

# countysum <- ddply(b[b$year>=1989,], ~fips.x,summarize,
#                    yearspan=length(unique(year))
# )
# b$yearspan <- countysum$yearspan[match(b$fips.x,countysum$fips.x)]
countysum <- b[b$year>1989,] %>% group_by(fips) %>% 
	summarize(yearspan=length(unique(year))
	)
b$yearspan <- countysum$yearspan[match(b$fips,countysum$fips)]



yearsum <- b[b$year>1989 & b$year<=2014 & b$yearspan>24,] %>%
	group_by(year) %>%
	summarize(
		meandem=mean(demshare_council,na.rm=T),
		sedem=sd(demshare_council,na.rm=T)/sqrt(length(demshare_council)))
writeLines(as.character(round(yearsum$meandem[yearsum$year==1990]*100,0)),"Tables/meandem_1990.tex")
writeLines(as.character(round(yearsum$meandem[yearsum$year==2014]*100,0)),"Tables/meandem_2014.tex")

year_region_summ <- b[b$year>1989 & b$year<=2014 & b$yearspan>24,] %>%
	group_by(year,bea_region) %>%
	summarize(
		meandem=mean(demshare_council,na.rm=T),
		sedem=sd(demshare_council)/sqrt(length(demshare_council)))

year_region_summ2 <- b[b$year>1989 & b$year<=2014 & b$yearspan>24,] %>%
	group_by(year,as.numeric(bea_region=="South")) %>%
	summarize(
		meandem=mean(demshare_council,na.rm=T),
		sedem=sd(demshare_council)/sqrt(length(demshare_council)))
names(year_region_summ2)[2] <- "bea_region"

(p <- ggplot(b[which(b$year>1989 & b$year<=2014 & b$yearspan>24),]) +
            geom_hline(yintercept = 0.5,linetype="dashed",alpha=0.5) + 
            geom_line(data = year_region_summ,size=1.5,
                      aes(x=year,y=meandem,group=bea_region,col=factor(bea_region),linetype=factor(bea_region)),
                      # position=position_dodge(width = 0.5),
                      alpha=1) +
            geom_ribbon(data=yearsum,aes(x = year, 
                            ymin = meandem - 1.96*sedem, ymax = meandem + 1.96*sedem, height=.1),
                        colour = "gray",alpha=0.1) +
            geom_line(data=yearsum,aes(x=year,y=meandem),size=1.5,col="black") +
            expand_limits(y=c(0.3,0.7)) +
            scale_linetype_manual(breaks=c("Northeast","West","South","Midwest"),
                                  values=c("dashed","dotdash","longdash", "dotted")) +
            scale_y_continuous(breaks=seq(0,1,0.1),labels=seq(0,1,0.1)) +
            scale_x_continuous(breaks=seq(1990,2014,2),labels=seq(1990,2014,2)) +
            theme_bw() +
            labs(x = "Year",
                 y = "Democratic Share of County Council") +
            theme(axis.text = element_text(size=15),axis.title = element_text(size=20),axis.ticks.length=unit(0.5, "cm")) +
            theme(legend.position="none") + 
            geom_text_repel(data=year_region_summ[which(year_region_summ$year==1990 & year_region_summ$bea_region %in% c("Midwest")),],
                            segment.size = 0.5,min.segment.length = unit(0.5, "lines"),arrow = arrow(length = unit(0.01, 'npc')),
                            point.padding = unit(1,"lines"),nudge_y = -0.04,
                            aes(x=1990,y=meandem,label = bea_region),
                            size = 6.5) + 
            geom_text_repel(data=year_region_summ[which(year_region_summ$year==1990 & year_region_summ$bea_region %in% c("South")),],
                            segment.size = 0.5,min.segment.length = unit(0.5, "lines"),arrow = arrow(length = unit(0.01, 'npc')),
                            point.padding = unit(1,"lines"),nudge_y = 0.04,
                            aes(x=1990,y=meandem,label = bea_region),
                            size = 6.5) + 
            geom_text_repel(data=year_region_summ[which(year_region_summ$year==2010 & year_region_summ$bea_region %in% c("Northeast")),],
                            segment.size = 0.5,min.segment.length = unit(0.5, "lines"),arrow = arrow(length = unit(0.01, 'npc')),
                            point.padding = unit(1,"lines"),
                            aes(x=2010,y=meandem,label = bea_region),
                            size = 6.5) + 
            geom_text_repel(data=year_region_summ[which(year_region_summ$year==2012 & year_region_summ$bea_region %in% c("West")),],
                            segment.size = 0.5,min.segment.length = unit(0.5, "lines"),arrow = arrow(length = unit(0.01, 'npc')),
                            point.padding = unit(1,"lines"),nudge_y = -0.04,
                            aes(x=2012,y=meandem,label = bea_region),
                            size = 6.5))
# ggsave("councilshare_overtime_regions.pdf",p,height=7,width=14)

# Binary South/Nonsouth version:
(p2 <- ggplot(b[b$year>1989 & b$year<=2014 & b$yearspan>24,]) +
            geom_hline(yintercept = 0.5,linetype="dashed",alpha=0.5) + 
            geom_line(data = year_region_summ2,size=1.5,
                      aes(x=year,y=meandem,group=bea_region,linetype=factor(bea_region)),
                      # position=position_dodge(width = 0.5),
                      alpha=1) +
            geom_ribbon(data=yearsum,aes(x = year, 
                                         ymin = meandem - 1.96*sedem, ymax = meandem + 1.96*sedem, height=.1),
                        colour = "gray",alpha=0.1) +
            geom_line(data=yearsum,aes(x=year,y=meandem),size=1.5,col="black") +
            expand_limits(y=c(0.3,0.7)) +
            scale_linetype_manual(breaks=c(0,1),
                                  values=c("dotted","dashed")) +
            scale_y_continuous(breaks=seq(0,1,0.1),labels=seq(0,1,0.1)) +
            scale_x_continuous(breaks=seq(1990,2014,2),labels=seq(1990,2014,2)) +
            theme_bw() +
            labs(x = "Year",
                 y = "Democratic Share of County Council") +
            theme(axis.text = element_text(size=15),axis.title = element_text(size=20),axis.ticks.length=unit(0.5, "cm")) +
            theme(legend.position="none") + 
            geom_text_repel(data=year_region_summ2[which(year_region_summ2$year==1993 & year_region_summ2[,2]==0),],
                            segment.size = 0.5,min.segment.length = unit(0.5, "lines"),arrow = arrow(length = unit(0.01, 'npc')),
                            point.padding = unit(1,"lines"),nudge_y = -0.04,
                            aes(x=1993,y=meandem,label = "Non-South"),
                            size = 6.5) + 
            geom_text_repel(data=year_region_summ2[which(year_region_summ2$year==1994 & year_region_summ2[,2]==1),],
                            segment.size = 0.5,min.segment.length = unit(0.5, "lines"),arrow = arrow(length = unit(0.01, 'npc')),
                            point.padding = unit(1,"lines"),nudge_y = 0.04,
                            aes(x=1994,y=meandem,label = "South"),
                            size = 6.5) 
)
####
## Figure 2: Average National and Regional-level Democratic Seat Share Across Time
####
ggsave("Figures/figure2_councilshare_overtime_south.pdf",p2,height=7,width=14)


